function processFile(blob, fileName) { var reader = new FileReader(); reader.onload = function(e) { PDFJS.workerSrc = '/js/pdf.worker.js'; PDFJS.getDocument(new Uint8Array(e.target.result)).then(function(pdf) { var pages = []; var chain = Promise.resolve(); for (let i = 1; i <= pdf.numPages; i++) { chain = chain.then(function(){ return pdf.getPage(i).then(function(page){ return page.getTextContent().then(function(tc){ var txt = (tc.items || []).map(function(it){ return it.str || ''; }).join(' ').replace(/\s+/g,' ').trim(); if (txt) pages.push(txt); }); }); }); } chain.then(function(){ var allText = pages.join('\n\n'); if (!allText) { alert('No readable text found in this PDF.'); return; } var txtBlob = new Blob([allText], { type: 'text/plain;charset=utf-8' }); add_file_output(URL.createObjectURL(txtBlob), fileName.replace(/\.pdf$/i, '.txt')); }); }).catch(function(){ alert('Could not read this PDF file.'); }); }; reader.readAsArrayBuffer(blob); } var _loadedScripts = {}; function loadScriptPromise(url) { if (_loadedScripts[url]) return _loadedScripts[url]; _loadedScripts[url] = new Promise(function (resolve, reject) { var s = document.createElement('script'); s.src = url; s.onload = resolve; s.onerror = reject; document.head.appendChild(s); }); return _loadedScripts[url]; } function replaceAll(find, replace, str) { return str.replace(new RegExp(find, 'g'), replace); } function beautify(str) { var result = ''; var length = str.length; var i = 0; var braceCountLeft = 0; var braceCountRight = 0; var withinQuotes = false; while (i < length) { var c = str[i]; if (c == '"' && (i == 0 || c[i - 1] != '\\')) { // non-escaped quotes withinQuotes = !withinQuotes; } if (!withinQuotes && (c == '}' || c == '{' || c == ',')) { console.log('Start####' + result); // look back and remove carriage returns and whitespace that are already there var resultIndex = result.length - 1; while (resultIndex >= 0 && (result[resultIndex] == ' ' || result[resultIndex] == '\r' || result[resultIndex] == '\n' || result[resultIndex] == '\t')) { resultIndex = resultIndex - 1; result = result.substr(0, resultIndex + 1); console.log('char ' + result[resultIndex] + '-----' + result + 'zzz ' + result.length + ' ' + resultIndex); } if (c == '{') { braceCountLeft++; result += c + '\r' + GetTabs(braceCountLeft - braceCountRight); } else if (c == '}') { braceCountRight++; // precede with carriage return result += '\r' + GetTabs(braceCountLeft - braceCountRight) + c; } else if (c == ',') { result += c + '\r' + GetTabs(braceCountLeft - braceCountRight); } var nextChar = ''; // advance through whitespace and remove carriage returns that are already there while (i < length && (str[i + 1] == ' ' || str[i + 1] == '\r' || str[i + 1] == '\n' || str[i + 1] == '\t')) { i++; } } else { result += str[i]; } i++; } return result; } function GetTabs(count) { var result = ''; for (var i = 0; i < count; i++) { result += ' '; } return result; }